Switch to bioconductor mode
library(pagoda2)
Warning message:
package ‘igraph’ was built under R version 3.5.3
library(conos)
library(parallel)
library(magrittr)
library(ggplot2)
library(pbapply)
library(tibble)
library(dplyr)
library(ggrastr)
library(cowplot)
library(ggbeeswarm)
library(readr)
library(pheatmap)
library(reshape2)
library(clusterProfiler)
library(DOSE)
library(org.Hs.eg.db)
library(enrichplot)
require(ComplexHeatmap)
require(circlize)
Load de results:
x <- list.files(path='..',pattern='de.*.rds'); names(x) <- gsub("de.(.*).rds","\\1",x)
del <- lapply(x,function(n) readRDS(paste0('../',n)))
Prepare GO categories …
go_datas <- c("BP", "CC", "MF") %>% setNames(., .) %>%
pblapply(function(n) clusterProfiler:::get_GO_data(org.Hs.eg.db, n, "ENTREZID") %>%
as.list() %>% as.environment()) # otherwise it pass reference to the environment content
Run enrichment tests
enrichGOOpt <- function (gene, OrgDB, goData, keyType = "ENTREZID", ont = "MF", pvalueCutoff = 0.05,
pAdjustMethod = "BH", universe=NULL, qvalueCutoff = 0.2, minGSSize = 10,
maxGSSize = 500, readable = FALSE, pool = FALSE) {
ont %<>% toupper %>% match.arg(c("BP", "CC", "MF"))
res <- clusterProfiler:::enricher_internal(gene, pvalueCutoff = pvalueCutoff,
pAdjustMethod = pAdjustMethod, universe = universe,
qvalueCutoff = qvalueCutoff, minGSSize = minGSSize,
maxGSSize = maxGSSize, USER_DATA = goData)
if (is.null(res))
return(res)
res@keytype <- keyType
res@organism <- clusterProfiler:::get_organism(OrgDB)
if (readable) {
res <- DOSE::setReadable(res, OrgDB)
}
res@ontology <- ont
return(res)
}
distanceBetweenTerms <- function(go.df) {
genes.per.go <- sapply(go.df$geneID, strsplit, "/") %>% setNames(go.df$Description)
all.go.genes <- unique(unlist(genes.per.go))
all.gos <- unique(go.df$Description)
genes.per.go.mat <- matrix(0, length(all.go.genes), length(all.gos)) %>%
`colnames<-`(all.gos) %>% `rownames<-`(all.go.genes)
for (i in 1:length(genes.per.go)) {
genes.per.go.mat[genes.per.go[[i]], go.df$Description[[i]]] <- 1
}
return(dist(t(genes.per.go.mat), method="binary"))
}
calculate.gos <- function(de,n.top.genes=300,n.cores=1) {
de <- de[unlist(lapply(de,is.list))]
# add Z scores
de <- lapply(de,function(d) {
res.table <- d$res;
res.table$Z <- -qnorm(res.table$pval/2)
res.table$Z[is.na(res.table$Z)] <- 0
res.table$Za <- -qnorm(res.table$padj/2)
res.table$Za[is.na(res.table$Za)] <- 0
res.table$Z <- res.table$Z * sign(res.table$log2FoldChange)
res.table$Za <- res.table$Za * sign(res.table$log2FoldChange)
d$res <- res.table;
d
})
gns <- list(down=lapply(de,function(x) rownames(x$res)[order(x$res$Z,decreasing=F)[1:n.top.genes]]),
up=lapply(de,function(x) rownames(x$res)[order(x$res$Z,decreasing=T)[1:n.top.genes]]),
all=list(all=unique(unlist(lapply(de,function(x) rownames(x$res))))))
gns.entrez <- lapply(gns,function(x) lapply(x, bitr, 'SYMBOL', 'ENTREZID', org.Hs.eg.db) %>% lapply(`[[`, "ENTREZID"))
gos <- lapply(gns.entrez[c('up','down')],function(gns) {
lapply(gns,enrichGOOpt,universe=gns.entrez$all$all, ont='BP', goData=go_datas[['BP']], readable=T, OrgDB=org.Hs.eg.db)# %>% lapply(function(x) x@result)
})
}
gos.cluster <- function(gos,n.clusters=20,max.pval=0.05) {
gos_filt <- lapply(gos,function(x) filter(x@result,p.adjust<max.pval))
gos_joint <- do.call(rbind,gos_filt)
gos_joint <- gos_filt %>% .[sapply(., nrow) > 0] %>% names() %>% setNames(., .) %>% lapply(function(n) cbind(gos_filt[[n]],Type=n)) %>% Reduce(rbind,.)
go_dist <- distanceBetweenTerms(gos_joint)
clusts <- hclust(go_dist,method='ward.D2') %>% cutree(n.clusters)
gos_per_clust <- split(names(clusts), clusts)
ngos_per_clust <- sapply(gos_per_clust, length)
#table(clusts)
gos_per_clust <- split(names(clusts), clusts)
gos_joint %<>% mutate(GOClust=clusts[Description])
name_per_clust <- gos_joint %>% group_by(GOClust, Description) %>% summarise(pvalue=exp(mean(log(pvalue)))) %>%
split(.$GOClust) %>% sapply(function(df) df$Description[which.min(df$pvalue)])
gos_joint %<>% mutate(GOClustName=name_per_clust[as.character(GOClust)])
# cluster summary
go_bp_summ_df <- gos_joint %>% group_by(Type, GOClustName) %>%
summarise(p.adjust=min(p.adjust)) %>% ungroup() %>% mutate(p.adjust=-log10(p.adjust)) %>%
tidyr::spread(Type, p.adjust) %>% as.data.frame() %>% set_rownames(.$GOClustName) %>% .[, 2:ncol(.)] #%>% .[, type_order[type_order %in% colnames(.)]]
go_bp_summ_df[is.na(go_bp_summ_df)] <- 0
return(list(joint=gos_joint,clusters=clusts,summary=go_bp_summ_df))
}
Calculate enrichments
gosl <- lapply(del,calculate.gos)
Calculate and plot clusters:
cols <- list(up=colorRamp2(c(0, 6), c("grey98", "red")),down=colorRamp2(c(0, 6), c("grey98", "blue")))
n.clusters <- 20; max.pval <- 0.05;
cx <- lapply(gosl$IB,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.IB.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.IB.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
Figure output with fixed cell type ordering
ord <- hclust(dist(t(rbind(cx$up$summary,cx$down$summary))))$order;
mu <- Heatmap(as.matrix(cx$up$summary[,ord]),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),column_names_gp = gpar(fontsize = 11),cluster_columns=F,show_heatmap_legend =F)
md <- Heatmap(as.matrix(cx$down$summary[,ord]),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),column_names_gp = gpar(fontsize = 11),cluster_columns=F,show_heatmap_legend =F)
pdf(file='de.IB.up.pdf',width=5.5,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.IB.down.pdf',width=5.5,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
cx <- lapply(gosl$TB,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.TB.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.TB.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
cx <- lapply(gosl$ID,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.ID.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.ID.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
Try plotting individual results:
x <- gosl2$IB$up$Progenitors
barplot(x, showCategory=20)
x <- gosl$IB$up$Progenitors
p <- dotplot(x, showCategory=20,title='Upregulated in Progenitors') +scale_colour_gradient(low = "red", high = "gray80") +xlab('fraction of genes') +scale_x_continuous(breaks=pretty(unlist(lapply(x@result$GeneRatio[1:20],function(x) eval(parse(text=x)))),3))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave('IB.Progenitor.up.pdf',width=6,height=6,plot=p)
p
x <- gosl$IB$down$Progenitors
p <- dotplot(x, showCategory=20,title='Downregulated in Progenitors') +scale_colour_gradient(low = "blue", high = "gray80")+xlab('fraction of genes')+scale_x_continuous(breaks=pretty(unlist(lapply(x@result$GeneRatio[1:20],function(x) eval(parse(text=x)))),3))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave('IB.Progenitor.down.pdf',width=6,height=6,plot=p)
p
x <- gosl2$IB$up$pDC
p <- dotplot(x, showCategory=20,title='Upregulated in pDC') +scale_colour_gradient(low = "red", high = "gray80") +xlab('fraction of genes')
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave('IB.pDC.up.pdf',width=8,height=6,plot=p)
p
x <- gosl2$IB$down$pDC
p <- dotplot(x, showCategory=20,title='Downregulated in pDC') +scale_colour_gradient(low = "blue", high = "gray80")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
ggsave('IB.pDC.down.pdf',width=8,height=6,plot=p)
p
Load Shenglin’s results
pw <- '/home/meisl/Workplace/BMME/Revision/DE/'
x <- list(cTB='de.correction.Tumor.vs.Benign.rds',pID='de.paired.Involved.vs.Distal.rds',pTD='de.paired.Tumor.vs.Distal.rds')
del2 <- lapply(x,function(x) readRDS(paste0(pw,x)))
del2$pID <- readRDS("../de.pID.rds")
del2$pTD <- readRDS("../de.pTD.rds")
gosl2 <- lapply(del2,calculate.gos)
Corrected Tumor vs. Benign
cx <- lapply(gosl2$cTB,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.cTB.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.cTB.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
Paired Tumor vs. Distal
cx <- lapply(gosl2$pTD,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.pTD.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.pTD.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
Paired Involved vs. Distal
cx <- lapply(gosl2$pID,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
mu <- Heatmap(as.matrix(cx$up$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
md <- Heatmap(as.matrix(cx$down$summary),col=cols$down,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10))
pdf(file='de.pID.up.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
null device
1
pdf(file='de.pID.down.pdf',width=7,height=4.5); draw(md, heatmap_legend_side = "left"); dev.off();
null device
1
mu; md;
Redraw heatmaps
sn <- function(x) setNames(x,x)
drawOverlapHetmap=function(Defile,dec=NULL,n.top.genes=300){
nn=names(Defile)
sname=nn[unlist(lapply(Defile, function(x) 'res' %in% names(x)))]
glist= lapply(sn(sname), function(x){
tmp1=Defile[[x]]$res
tmp1=addZscore(tmp1,dec=dec)
rownames(tmp1)[1:n.top.genes]
})
Zscore= lapply(sn(sname), function(x){
tmp1=Defile[[x]]$res
tmp1=addZscore(tmp1,dec=dec)
ss=tmp1$Z
names(ss)=rownames(tmp1)
ss
})
rr=listToOverlapMatrix2(glist)
for( i in seq(nrow(rr))){
rr[i,i]=NA
}
rr
}
addZscore=function(res.table,dec=NULL){
res.table$Z <- -qnorm(res.table$pval/2)
res.table$Z[is.na(res.table$Z)] <- 0
res.table$Za <- -qnorm(res.table$padj/2)
res.table$Za[is.na(res.table$Za)] <- 0
res.table$Z <- res.table$Z * sign(res.table$log2FoldChange)
res.table$Za <- res.table$Za * sign(res.table$log2FoldChange)
res.table=res.table[order(res.table$Z),]
if (!is.null(dec)){
res.table=res.table[order(res.table$Z,decreasing=dec),]
} else { # otherwise sort by absolute magnitude of Z
res.table <- res.table[order(abs(res.table$Z),decreasing=T),]
}
return(res.table)
}
listToOverlapMatrix2=function(res,counts=NULL){
lc=length(res)
stat=matrix(rep(0,lc*lc),lc,lc)
for( i in seq(lc)){
for (j in seq(lc)){
if( i!=j){
stat[i,j]=length(intersect(res[[i]],res[[j]]))/length(union(res[[i]],res[[j]]))
if (!is.null(counts)){stat[i,j]=length(intersect(res[[i]],res[[j]])) }
}
}
}
colnames(stat)=names(res)
rownames(stat)=names(res)
return(stat)
}
rr <- drawOverlapHetmap(del$IB,dec=NULL,n.top.genes=300)
pheatmap(rr,na_col = "gray50",filename='de.IB.overlap.pdf',height=4.3,width=5,treeheight_row=25,treeheight_col=25)
pheatmap(rr,na_col = "gray50",treeheight_row=25,treeheight_col=25)
Compare pared and regular tests
x <- del$ID; y <- del2$pID;
x <- del$TD; y <- del2$pTD;
x <- x[unlist(lapply(x,is.list))]
y <- y[unlist(lapply(y,is.list))]
sum(unlist(lapply(x,function(x) sum(x$res$pvalue<1e-3,na.rm=T))))
[1] 2576
sum(unlist(lapply(y,function(x) sum(x$res$pvalue<1e-3,na.rm=T))))
[1] 2355
scon <- Conos$new(readRDS("~pkharchenko/m/scadden/bmmet/jan2019/scon.rds"))
nfac <- readRDS("../nfac.rds")
#nfac <- readRDS("/d0-mendel/home/meisl/Workplace/BMME/Figures/data/cell.ano.merged.rds")
tumor.cells <- names(nfac)[nfac=='Tumor']
tt <- table(scon$getDatasetPerCell()[tumor.cells])
valid.samples <- names(tt)[tt>10]
cdl <- lapply(scon$samples[valid.samples],function(x) t(x$misc$rawCounts[rownames(x$misc$rawCounts) %in% tumor.cells,,drop=F]))
names(cdl) <- gsub("Noninvolved","Distal",names(cdl))
cdl <- lapply(cdl,function(m) { colnames(m) <- gsub("Noninvolved","Distal",colnames(m)); m})
tp2 <- mclapply(cdl,basicP2proc, nPcs=3, get.tsne=T, get.largevis=F, make.geneknn=F, trim=0 , min.cells.per.gene = -1, n.cores=1,n.odgenes=1000,mc.cores=30)
Conos
tcon <- Conos$new(tp2,n.cores=1)
tcon$buildGraph(k = 10,k.self = 5,ncomps = 10,metric = 'L2',n.odgenes = 1000)
found 0 out of 45 cached PCA space pairs ... running 45 additional PCA space pairs ............................................. done
inter-sample links using mNN ............................................. done
local pairs local pairs .......... done
building graph ..done
# store different embeddings
tcon$misc$embeddings <- list()
#tcon$embedGraph(method='UMAP');
#tcon$misc$embeddings$umap <- tcon$embedding
tcon$embedGraph(sgd_batches=0.1e8,apha=1);
Estimating embeddings.
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
tcon$misc$embeddings$lv <- tcon$embedding
tcon$findCommunities(method=leiden.community,resolution=1)
alpha <- 0.3; size <- 2;
tcon$embedding <- tcon$misc$embeddings$lv
n1 <- tcon$plotGraph(color.by='sample',alpha=alpha,size=size,mark.groups=T,plot.na=F)
#n2 <- tcon$plotGraph(gene='FOS',alpha=alpha,size=size,mark.groups=F,plot.na=F)
n2 <- tcon$plotGraph(alpha=alpha,size=size,mark.groups=T,plot.na=F)
plot_grid(plotlist=list(n1,n2),nrow=1)
tcon$plotGraph(colors=csm[,4],alpha=alpha,size=size,mark.groups=F,plot.na=F,gradient.range.quantile=0.97)
alpha <- 0.3; size <- 2;
tcon$embedding <- tcon$misc$embeddings$lv
n1 <- tcon$plotGraph(color.by='sample',alpha=alpha,size=size,mark.groups=F,plot.na=F)+theme(legend.position = 'right')+ guides(color=guide_legend(title="Sample",override.aes=list(size=5)))
ggsave('tcon.samples.pdf',n1,width=4.5,height=3)
n1
cm <- do.call(cbind,lapply(conos:::rawMatricesWithCommonGenes(Conos$new(tp2)),function(m) t(m[rownames(m)%in% tumor.cells,,drop=F])))
tcp2 <- basicP2proc(cm, nPcs=20, get.tsne=T, get.largevis=F, make.geneknn=F, trim=1 , min.cells.per.gene = 0, n.cores=30,n.odgenes=3000)
tcp2$plotEmbedding(type='PCA',groups=as.factor(scon$getDatasetPerCell()))
using provided groups as a factor
conos::embeddingPlot(tcp2$embeddings[[1]]$tSNE,groups=as.factor(scon$getDatasetPerCell()),size=2)
conos::embeddingPlot(tcp2$embeddings[[1]]$tSNE,colors=csm[,4],gradient.range.quantile=0.97)
# epithelial cell differentiation
# ELF3
conos::embeddingPlot(tcp2$embeddings[[1]]$tSNE,colors=tcp2$counts[,'SOX9'],gradient.range.quantile=0.97)
Project other datasets to the PCs of each dataset
common.genes <- Reduce(intersect, lapply(tp2,function(x) colnames(x$counts)))
n.top.pcs <- 1;
pcl <- lapply(tp2,function(r) {
x <- r$misc$PCA$v[,1:n.top.pcs,drop=F]
x <- x[rownames(x) %in% common.genes,,drop=F]
x <- t(t(x)/colSums(x*x))
})
# project
csm <- do.call(rbind,lapply(tp2,function(r) {
x <- r$counts;
#x@x <- x@x*rep(r$misc[['varinfo']][colnames(x),'gsf'],diff(x@p)); # apply variance scaling
center <- Matrix::colMeans(x)
pcs <- do.call(cbind,lapply(pcl,function(pcm) {
#z <- pcm[sample(1:nrow(pcm)),,drop=F]; rownames(z) <- rownames(pcm); pcm <- z;
pcas <- t(as.matrix(t(x[,rownames(pcm),drop=F] %*% pcm)) - as.vector(t(center[rownames(pcm)] %*% pcm)))
}))
if(n.top.pcs>1) { colnames(pcs) <- paste(rep(names(pcl),each=n.top.pcs),rep(1:n.top.pcs,length(pcl)),sep='_')} else { colnames(pcs) <- names(pcl)}
pcs
}))
Optimize orientations:
for(i in 1:10) {
cc <- cor(csm)
diag(cc) <- 0;
mi <- sort(apply(cc,1,function(x) x[which.max(abs(x))]))
if(mi[1]<0) {
flip.ind <- names(mi)[1]
csm[,flip.ind] <- -1*csm[,flip.ind]
}
}
Visualize
Scores
require(ComplexHeatmap)
sl <- unique(gsub("-.*","",colnames(csm))); sc <- setNames(rainbow(length(sl)),sl)
hc <- hclust(as.dist(1-abs(cor(csm,method='spearman'))))
#hc <- hclust(as.dist(1-cor(csm)))
plot(hc)
co <- hc$order
Heatmap(csm, cluster_rows = T, show_row_names = F,border=T, column_order=co,
bottom_annotation=HeatmapAnnotation(sample=factor(setNames(gsub("-.*","",colnames(csm)),colnames(csm)),levels=sl),col=list(sample=sc)),
right_annotation = HeatmapAnnotation(which='row',sample=factor(setNames(gsub("-.*","",rownames(csm)),rownames(csm)),levels=sl),col=list(sample=sc))
)
Fix remaining flips
csm[,co[c(2,3)]] <- -csm[,co[c(2,3)]]
csm[,co[c(7:10)]] <- -csm[,co[c(7:10)]]
csm[,co[c(1)]] <- -csm[,co[c(1)]]
#x <- csm;
#depth <- Matrix::colSums(cm)[rownames(csm)];
#depth <- log10(depth); depth <- (depth-mean(depth))*range(diff(depth))*2
#csm <- cbind(csm,depth=depth)
hm <- Heatmap(csm, name='PC scores', cluster_rows = T, show_row_names = F,border=T, cluster_columns = hc,
#bottom_annotation=HeatmapAnnotation(sample=factor(setNames(gsub("-.*","",colnames(csm)),colnames(csm)),levels=sl),col=list(sample=sc)),
#right_annotation = HeatmapAnnotation(which='row',sample=factor(setNames(gsub("-.*","",rownames(csm)),rownames(csm)),levels=sl),col=list(sample=sc)),
col=circlize::colorRamp2(c(-3, 0, 3), c('darkgreen','grey90','orange')),
row_split = as.factor(setNames(gsub("_.*","",rownames(csm)),rownames(csm))),row_title_rot=0,
use_raster = T,raster_device = "CairoPNG")
pdf(file='pc.heatmap.pdf',width=5,height=7); print(hm); dev.off()
null device
1
hm
alpha <- 0.3; size <- 2;
pl <- apply(csm,2,function(d) tcon$plotGraph(colors=d,alpha=alpha,size=size,mark.groups=F,plot.na=F,gradient.range.quantile=0.95,palette=colorRampPalette(c('darkgreen','grey90','orange'),space='Lab'))) %>%
mapply(function(x,y) x+ggtitle(y)+ theme(plot.title = element_text(size=16)),.,colnames(csm), SIMPLIFY=F)
#pl <- mapply(function(x,y) { x+ggtitle(y)},pl,colnames(csm),SIMPLIFY=F)
pl <- plot_grid(plotlist=pl[co],nrow=2)
pdf(file='tcon.PCs.pdf',width=10,height=4.4); print(pl); dev.off();
png
2
pl
alpha <- 0.8; size <- 4;
pl <- tcon$plotPanel(colors=csm[,'BMET10-Tumor'],palette=colorRampPalette(c('darkgreen','grey90','orange'),space='Lab'),size=size,alpha=alpha,gradient.range.quantile=0.95,nrow=2,title.size=0,return.plotlist=T)
pl <- mapply(function(x,y) x+ggtitle(y)+ theme(plot.title = element_text(size=16)),pl,names(tcon$samples),SIMPLIFY=F);
pp <- plot_grid(plotlist=pl[co],nrow=2)
pdf(file='tSNE.BMET10_PC.pdf',width=10,height=4.4); print(pp); dev.off();
png
2
pp
alpha <- 0.8; size <- 2;
#sig1 IER2, SOX9, SOX4, FOS? JUNB BRD2, AHNAK
#sig2 MT-CO2, MT-CO1
tcon$plotPanel(gene='UBE2T',size=size,alpha=alpha,gradient.range.quantile=0.95,nrow=2,title.size=4)
Expression on the sample-specific tSNEs
gns <- c("IER2","JUNB",'SOX4','STMN1','H2AFZ','UBE2T')
gns <- c("IER2",'SOX4','UBE2T')
alpha <- 0.8; size <- 3
pl <- lapply(gns,function(g) {
pl <- tcon$plotPanel(gene=g,size=size,alpha=alpha,gradient.range.quantile=0.95,nrow=1,title.size=0,return.plotlist=T)[co];
#pl <- mapply(function(x,y) x+ggtitle(y)+ theme(plot.title = element_text(size=16)),pl,names(tcon$samples),SIMPLIFY=F);
})
pp <- plot_grid(plotlist=unlist(pl,recursive=F),nrow=length(gns))
pdf(file='tSNE.genes.pdf',width=20,height=6); print(pp); dev.off();
png
2
pp
PC overlap
listToOverlapMatrix2=function(res,counts=NULL){
lc=length(res)
stat=matrix(rep(0,lc*lc),lc,lc)
for( i in seq(lc)){
for (j in seq(lc)){
if( i!=j){
stat[i,j]=length(intersect(res[[i]],res[[j]]))/length(union(res[[i]],res[[j]]))
if (!is.null(counts)){stat[i,j]=length(intersect(res[[i]],res[[j]])) }
}
}
}
colnames(stat)=names(res)
rownames(stat)=names(res)
return(stat)
}
topgnl <- lapply(pcl,function(x) na.omit((rownames(x)[order(abs(x[,1]),decreasing =T)])[1:200]))
om <- listToOverlapMatrix2(topgnl); diag(om) <- NA;
#pheatmap(rr,na_col = "gray50",filename='de.IB.overlap.pdf',height=4.3,width=5,treeheight_row=25,treeheight_col=25)
hm <- pheatmap::pheatmap(om,na_col = "gray50",treeheight_row=25,treeheight_col=25)
pdf(file='overlap.heatmap.pdf',width=6,height=5); print(hm); dev.off();
hm
intersect(topgnl[['BMET1-Tumor']],topgnl[['BMET5-Tumor']])
intersect(topgnl[['BMET10-Tumor']],topgnl[['BMET5-Tumor']])
Calculate go enrichments
topgnl <- lapply(pcl,function(x) (rownames(x)[order(abs(x[,1]),decreasing =T)])[1:200])
topgnl$intersect <- intersect(topgnl[['BMET10-Tumor']],topgnl[['BMET5-Tumor']])
#topgnl <- unlist(lapply(1:ncol(pcl[[1]]),function(i) setNames(lapply(pcl,function(x) (rownames(x)[order(abs(x[,i]),decreasing =T)])[1:200]) ,paste(names(pcl),i,sep='_'))),recursive=F)
#topgnl$intersect <- intersect(topgnl[['BMET10-Tumor_1']],topgnl[['BMET5-Tumor_1']])
topgnl$all <- common.genes;
topgnl.entrez <- lapply(topgnl, bitr, 'SYMBOL', 'ENTREZID', org.Hs.eg.db) %>% lapply(`[[`, "ENTREZID")
common.genes.entrez <- topgnl.entrez$all; topgnl.entrez$all <- NULL;
top.gos <- lapply(topgnl.entrez,enrichGOOpt,universe=common.genes.entrez, ont='BP', goData=go_datas[['BP']], readable=T, OrgDB=org.Hs.eg.db)
cx <-gos.cluster(top.gos[-grep('intersect',names(top.gos))],n.clusters=10,max.pval=0.05)
mu <- Heatmap(as.matrix(cx$summary),col=cols$up,border=T,show_row_dend=F,show_column_dend=F, heatmap_legend_param = list(title = 'Z score'), row_names_max_width = unit(8, "cm"),row_names_gp = gpar(fontsize = 10),)
pdf(file='tumor.pc.functions.pdf',width=7,height=4.5); draw(mu, heatmap_legend_side = "left"); dev.off();
mu
names(cx$clusters)[cx$clusters==cx$cluster['epithelial cell differentiation']]
[1] "mRNA processing"
[2] "RNA splicing"
[3] "RNA splicing, via transesterification reactions"
[4] "RNA splicing, via transesterification reactions with bulged adenosine as nucleophile"
[5] "mRNA splicing, via spliceosome"
[6] "epithelial cell differentiation"
[7] "response to starvation"
[8] "cellular response to starvation"
[9] "positive regulation of programmed cell death"
[10] "response to extracellular stimulus"
[11] "cellular response to extracellular stimulus"
[12] "response to nutrient levels"
[13] "negative regulation of cell proliferation"
[14] "cellular response to nutrient levels"
[15] "response to glucocorticoid"
[16] "positive regulation of apoptotic process"
[17] "response to steroid hormone"
[18] "cellular response to hormone stimulus"
[19] "regulation of epithelial cell proliferation"
[20] "epithelial cell proliferation"
[21] "cellular response to external stimulus"
[22] "type I interferon signaling pathway"
[23] "cellular response to type I interferon"
[24] "fat cell differentiation"
[25] "response to corticosteroid"
[26] "muscle structure development"
[27] "response to type I interferon"
[28] "negative regulation of multi-organism process"
[29] "response to oxidative stress"
[30] "cellular response to oxidative stress"
[31] "regulation of fat cell differentiation"
[32] "response to cAMP"
[33] "ATF6-mediated unfolded protein response"
[34] "response to growth factor"
[35] "female pregnancy"
[36] "head development"
[37] "cellular response to metal ion"
[38] "maintenance of protein localization in endoplasmic reticulum"
[39] "cellular response to lipid"
[40] "alpha-linolenic acid metabolic process"
[41] "negative regulation of transcription from RNA polymerase II promoter in response to stress"
[42] "response to toxic substance"
[43] "response to purine-containing compound"
[44] "cellular response to growth factor stimulus"
[45] "cellular response to calcium ion"
[46] "response to inorganic substance"
[47] "multi-multicellular organism process"
[48] "response to ketone"
[49] "regulation of cellular amide metabolic process"
[50] "response to antibiotic"
[51] "regulation of translation"
[52] "regulation of transcription from RNA polymerase II promoter in response to stress"
[53] "cellular response to organic cyclic compound"
[54] "response to mechanical stimulus"
[55] "regulation of DNA-templated transcription in response to stress"
[56] "cellular response to inorganic substance"
[57] "brain development"
[58] "aging"
[59] "negative regulation of translation"
[60] "response to mineralocorticoid"
[61] "cellular response to antibiotic"
[62] "cellular response to glucose starvation"
[63] "response to organophosphorus"
[64] "acylglycerol homeostasis"
[65] "triglyceride homeostasis"
[66] "cellular response to transforming growth factor beta stimulus"
[67] "carboxylic acid biosynthetic process"
[68] "maintenance of protein localization in organelle"
[69] "organic acid biosynthetic process"
[70] "response to transforming growth factor beta"
[71] "negative regulation of cellular amide metabolic process"
[72] "muscle tissue development"
[73] "response to calcium ion"
[74] "regulation of transmembrane receptor protein serine/threonine kinase signaling pathway"
[75] "cellular response to glucocorticoid stimulus"
[76] "response to peptide"
[77] "transmembrane receptor protein serine/threonine kinase signaling pathway"
[78] "cellular response to amino acid starvation"
[79] "positive regulation of neuron death"
[80] "regulation of response to oxidative stress"
[81] "response to wounding"
[82] "antigen processing and presentation of exogenous peptide antigen via MHC class I, TAP-dependent"
[83] "cell-cell adhesion"
[84] "cellular response to corticosteroid stimulus"
[85] "positive regulation of long-term synaptic potentiation"
[86] "metal ion homeostasis"
[87] "regulation of systemic arterial blood pressure"
[88] "cellular divalent inorganic cation homeostasis"
[89] "ion homeostasis"
[90] "muscle cell differentiation"
[91] "transition metal ion homeostasis"
[92] "divalent inorganic cation homeostasis"
[93] "Fc receptor signaling pathway"
[94] "ephrin receptor signaling pathway"
[95] "cation homeostasis"
[96] "inorganic ion homeostasis"
[97] "cellular metal ion homeostasis"
[98] "antimicrobial humoral immune response mediated by antimicrobial peptide"
[99] "regulation of long-term synaptic potentiation"
[100] "striated muscle cell differentiation"
[101] "antimicrobial humoral response"
[102] "regulation of translational initiation"
[103] "striated muscle tissue development"
[104] "transforming growth factor beta receptor signaling pathway"
[105] "response to peptide hormone"
[106] "muscle organ development"
[107] "response to progesterone"
[108] "response to oxygen levels"
[109] "response to hydrogen peroxide"
[110] "forebrain development"
[111] "response to hypoxia"
[112] "response to decreased oxygen levels"
[113] "negative regulation of fat cell differentiation"
[114] "skeletal muscle cell differentiation"
[115] "cholesterol homeostasis"
[116] "sterol homeostasis"
[117] "protein kinase B signaling"
[118] "skeletal muscle tissue development"
[119] "response to metal ion"
[120] "female sex differentiation"
[121] "skeletal muscle organ development"
[122] "detoxification of copper ion"
[123] "stress response to copper ion"
[124] "detoxification of inorganic compound"
[125] "stress response to metal ion"
[126] "cellular response to copper ion"
[127] "cellular response to zinc ion"
cx$joint[cx$joint$Description=='epithelial cell differentiation',]
cx$joint[cx$joint$Description=='transforming growth factor beta receptor signaling pathway',]
cx$joint[cx$joint$Description=='vasculature development',]
cx$joint[cx$joint$Description=="stress response to metal ion",]
Clustering-based analysis
tcon$findCommunities(method=leiden.community,resolution=0.6)
alpha <- 0.3; size <- 2;
tcon$embedding <- tcon$misc$embeddings$lv
n1 <- tcon$plotGraph(color.by='sample',alpha=alpha,size=size,mark.groups=T,plot.na=F)
#n2 <- tcon$plotGraph(gene='FOS',alpha=alpha,size=size,mark.groups=F,plot.na=F)
n2 <- tcon$plotGraph(alpha=alpha,size=size,mark.groups=T,plot.na=F)
plot_grid(plotlist=list(n1,n2),nrow=1)
tfac <- tcon$clusters$leiden$groups
levels(tfac) <- c('1','2','3','3')
tcon.de <- tcon$getDifferentialGenes(groups=tfac,n.cores=30,append.auc=TRUE,z.threshold=0,upregulated.only=T)
Calculate enrichments
tgos <- calculate.gos(lapply(tcon.de,function(de) { de$pvalue <- de$PValue; de$padj <- de$PAdj; de$log2FoldChange <- de$M; rownames(de) <- de$Gene; list(res=de) }),n.top.genes = 100)
'select()' returned 1:1 mapping between keys and columns
7% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
5% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
14.13% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
17% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
11% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
14.13% of input gene IDs are fail to map...'select()' returned 1:1 mapping between keys and columns
7.12% of input gene IDs are fail to map...
Calculate and plot clusters:
cols <- list(up=colorRamp2(c(0, 6), c("grey98", "red")),down=colorRamp2(c(0, 6), c("grey98", "blue")))
n.clusters <- 20; max.pval <- 0.05;
cx <- lapply(tgos,gos.cluster,n.clusters=n.clusters,max.pval=max.pval)
A small version of the hetmap, for the main figure
source("~/m/p2/conos/R/plot.R")
genes <- c('MS4A1','CD79A','CD79B','MZB1','VPREB3','SEC11C','IGLL5','GNLY','GZMB','LILRA4','AZU1','MPO','FCN1','IER3','C5AR1','IGJ','SOX4','STMN1','MYL9','HBD','GZMK','AR','KLK2')
pp <- plotDEheatmap(scon,nfac,annot.de,n.genes.per.cluster = 20 ,show.gene.clusters=T, column.metadata.colors = list(clusters=typefc.pal), order.clusters = T, additional.genes = genes, labeled.gene.subset = genes, min.auc = 0.6,use_raster = T,raster_device = "CairoPNG")
pp